Get Data

Import data from CSV files.

  p_load(readr)
  p_load(stringi)

  expenses <- read_csv("XMasExpenses.csv")
## Parsed with column specification:
## cols(
##   Total = col_double(),
##   Food = col_double(),
##   Gifts = col_double(),
##   Elect = col_double()
## )
  head(round(expenses,0))
## # A tibble: 6 x 4
##   Total  Food Gifts Elect
##   <dbl> <dbl> <dbl> <dbl>
## 1   668    33   252    96
## 2   263    27   128   104
## 3   419    39   201    85
## 4   273    37   219    97
## 5   191    25   154    84
## 6   262    40   225    73

Simple Descriptives

A few summary plots and numerical descriptives follow.

  summary(expenses)
##      Total            Food           Gifts           Elect       
##  Min.   :  0.0   Min.   : 0.00   Min.   :  0.0   Min.   :  0.00  
##  1st Qu.:265.6   1st Qu.:24.76   1st Qu.:178.5   1st Qu.: 74.85  
##  Median :335.8   Median :30.13   Median :215.3   Median : 88.89  
##  Mean   :363.3   Mean   :30.09   Mean   :215.2   Mean   : 88.60  
##  3rd Qu.:452.9   3rd Qu.:35.34   3rd Qu.:252.3   3rd Qu.:102.42  
##  Max.   :788.0   Max.   :57.76   Max.   :398.3   Max.   :166.46  
##                  NA's   :1       NA's   :12
  p_load(lattice)
  pairs(expenses)

  bwplot(~Total, data=expenses)

  bwplot(~Elect, data=expenses)

  xyplot(Total~Food, data=expenses)

Models

We now fit a few models.

  expenses.lm.Elect = lm(Total~Elect, data=expenses)
  xyplot(residuals~fitted.values, data=expenses.lm.Elect, aspect=1)

  expenses.lm.ElectFood = lm(Total~Elect+Food, data=expenses)
  xyplot(residuals~fitted.values, data=expenses.lm.ElectFood, aspect=1)

  expenses.lm.full = lm(Total~Elect+Food+Gifts, data=expenses)
  summary(expenses.lm.full)
## 
## Call:
## lm(formula = Total ~ Elect + Food + Gifts, data = expenses)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -207.50  -86.88  -22.61   43.10  327.16 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -288.93241    6.99335 -41.315  < 2e-16 ***
## Elect          4.23863    0.04590  92.355  < 2e-16 ***
## Food           1.03324    0.13045   7.921 2.48e-15 ***
## Gifts          1.14150    0.02152  53.054  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 110 on 20425 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.3111, Adjusted R-squared:  0.311 
## F-statistic:  3075 on 3 and 20425 DF,  p-value: < 2.2e-16
  xyplot(residuals~expenses$Gifts, data=expenses.lm.full)

  xyplot(residuals~fitted.values, data=expenses.lm.full, aspect=1, cex=.25, col="green")